print("Loading data...")
load("02_09_m5s_join_user_activity_df_univ_only.RData")

require(data.table)

#head(join_user_activity_df_univ_only)

join_user_activity_df_univ_only <- data.table(join_user_activity_df_univ_only)

join_user_activity_df_univ_only <- subset(join_user_activity_df_univ_only, date<1422748800)

user_activity_univ_only_timeframe <- 
  join_user_activity_df_univ_only[,
                                  list(min=min(date), max=max(date), n=.N,
                                       facebook=sum(source=='facebook'),
                                       meetup=sum(source=='meetup'),
                                       blog=sum(source=='blog'),
                                       forum=sum(source=='forum'),
                                       like=sum(activity=='like'),
                                       comment=sum(activity=='comment'),
                                       post=sum(activity=='post'),
                                       rsvp=sum(activity=='rsvp'),
                                       join=sum(activity=='join')),
                                  by="universal_id,gender"]
  
user_activity_univ_only_timeframe$timediff <- 
  user_activity_univ_only_timeframe$max - 
  user_activity_univ_only_timeframe$min


save(user_activity_univ_only_timeframe, file="02_11_m5s_user_activity_univ_only_timeframe.RData")

# Timeseries by user 
sequence <- seq(as.Date('2005-01-01'),as.Date('2015-02-01'), by=7)

unique_id_timeseries <- list()

for (what in c(sources, activities)) {
  col <- if (what %in% sources) "source" else "activity"
  tmp <- subset(join_user_activity_df_univ_only, get(col)==what)
  unique_id_timeseries[[what]] <- list()
  unique_id_timeseries[[what]][['n']] <- length(unique(tmp$universal_id))
  unique_id_timeseries[[what]][['ts']] <- 
    tmp[, from := cut(as.Date(as.POSIXct(date, origin='1970-01-01')), sequence)][,.(active_users = length(unique(universal_id))), by = from]
}

lst_combinations <- list(c("facebook", "post"), c("facebook", "comment"), c("facebook", "like"),
                         c("blog","post"), c("blog","comment"))

for (what in lst_combinations) {
  tmp <- subset(join_user_activity_df_univ_only, source==what[1] & activity==what[2])
  unique_id_timeseries[[paste0(what[1],"_",what[2])]] <- list()
  unique_id_timeseries[[paste0(what[1],"_",what[2])]][['n']] <- length(unique(tmp$universal_id))
  unique_id_timeseries[[paste0(what[1],"_",what[2])]][['ts']] <- 
    tmp[, from := cut(as.Date(as.POSIXct(date, origin='1970-01-01')), sequence)][,.(active_users = length(unique(universal_id))), by = from]
}
                         
                        
save(unique_id_timeseries, file="02_11_m5s_user_activity_univ_only_timeseries.RData")
